test.ts.ts ➔ isUrl   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 8
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 2
1
import 'async';
2
import 'assert';
3
import 'should';
4
5
// Simple function to validate URLs using the URL object
6
function isUrl(url: string): boolean {
7
  try {
8
    new URL(url);
9
    return true;
10
  } catch {
11
    return false;
12
  }
13
}
14
15
import Sitemapper from '../../lib/assets/sitemapper.js';
16
import { SitemapperResponse } from '../../sitemapper';
17
let sitemapper: Sitemapper;
18
19
describe('Sitemapper', function () {
20
  beforeEach(() => {
21
    sitemapper = new Sitemapper();
22
  });
23
24
  describe('Sitemapper Class', function () {
25
    it('should have initializeTimeout method', () => {
26
      sitemapper.initializeTimeout.should.be.Function;
27
    });
28
29
    it('should have crawl method', () => {
30
      sitemapper.crawl.should.be.Function;
31
    });
32
33
    it('should have parse method', () => {
34
      sitemapper.parse.should.be.Function;
35
    });
36
37
    it('should have fetch method', () => {
38
      sitemapper.fetch.should.be.Function;
39
    });
40
41
    it('should construct with a url', () => {
42
      sitemapper = new Sitemapper({
43
        url: 'google.com',
44
      });
45
      sitemapper.url.should.equal('google.com');
46
    });
47
48
    it('should construct with a timeout', () => {
49
      sitemapper = new Sitemapper({
50
        timeout: 1000,
51
      });
52
      sitemapper.timeout.should.equal(1000);
53
    });
54
55
    it('should set timeout', () => {
56
      sitemapper.timeout = 1000;
57
      sitemapper.timeout.should.equal(1000);
58
    });
59
60
    it('should set url', () => {
61
      sitemapper.url = 1000;
62
      sitemapper.url.should.equal(1000);
63
    });
64
65
    it('should construct with specific fields', () => {
66
      sitemapper = new Sitemapper({
67
        fields: { loc: true, lastmod: true, priority: true, changefreq: true },
68
      });
69
      sitemapper.fields.should.be.Object &&
70
        sitemapper.fields.should.have.keys(
71
          'loc',
72
          'lastmod',
73
          'priority',
74
          'changefreq'
75
        );
76
    });
77
  });
78
79
  describe('fetch Method resolves sites to array', function () {
80
    it('https://wp.seantburke.com/sitemap.xml sitemaps should be an array', function (done) {
81
      this.timeout(30000);
82
      const url = 'https://wp.seantburke.com/sitemap.xml';
83
      sitemapper
84
        .fetch(url)
85
        .then((data) => {
86
          data.sites.should.be.Array;
87
          data.url.should.equal(url);
88
          data.sites.length.should.be.above(2);
89
          isUrl(data.sites[0]).should.be.true;
90
          done();
91
        })
92
        .catch((error) => {
93
          console.error('Test failed');
94
          done(error);
95
        });
96
    });
97
98
    it('gibberish.gibberish should fail silently with an empty array', function (done) {
99
      this.timeout(30000);
100
      const url = 'http://gibberish.gibberish';
101
      sitemapper.debug = true;
102
      sitemapper
103
        .fetch(url)
104
        .then((data) => {
105
          data.sites.should.be.Array;
106
          data.errors.should.be.Array;
107
          data.errors.length.should.be.greaterThan(0);
108
          console.log(data);
109
          done();
110
        })
111
        .catch((error) => {
112
          console.error('Test failed');
113
          done(error);
114
        });
115
    });
116
117
    it('https://webflow.com/sitemap.xml sitemaps should be an array', function (done) {
118
      this.timeout(30000);
119
      const url = 'https://webflow.com/sitemap.xml';
120
      sitemapper
121
        .fetch(url)
122
        .then((data) => {
123
          data.sites.should.be.Array;
124
          data.url.should.equal(url);
125
          data.sites.length.should.be.above(2);
126
          isUrl(data.sites[0]).should.be.true;
127
          done();
128
        })
129
        .catch((error) => {
130
          console.error('Test failed');
131
          done(error);
132
        });
133
    });
134
135
    it('https://wp.seantburke.com/sitemap.xml sitemaps should contain extra fields', function (done) {
136
      this.timeout(30000);
137
      const url = 'https://wp.seantburke.com/sitemap.xml';
138
      sitemapper = new Sitemapper({
139
        fields: {
140
          loc: true,
141
          lastmod: true,
142
          priority: true,
143
          changefreq: true,
144
          sitemap: true,
145
        },
146
      });
147
      sitemapper
148
        .fetch(url)
149
        .then((data) => {
150
          data.sites.should.be.Array;
151
          data.url.should.equal(url);
152
          data.sites.length.should.be.above(2);
153
          data.sites[0].loc.should.be.String;
154
          data.sites[0].lastmod.should.be.String;
155
          data.sites[0].priority.should.be.String;
156
          data.sites[0].changefreq.should.be.String;
157
          data.sites[0].should.have.property('sitemap').which.is.a.String();
158
          isUrl(data.sites[0].sitemap).should.be.true;
159
          done();
160
        })
161
        .catch((error) => {
162
          console.error('Test failed');
163
          done(error);
164
        });
165
    });
166
167
    it('https://www.golinks.io/sitemap.xml sitemaps should be an array', function (done) {
168
      this.timeout(30000);
169
      const url = 'https://www.golinks.io/sitemap.xml';
170
      sitemapper.timeout = 5000;
171
      sitemapper
172
        .fetch(url)
173
        .then((data) => {
174
          data.sites.should.be.Array;
175
          data.url.should.equal(url);
176
          data.sites.length.should.be.above(2);
177
          isUrl(data.sites[0]).should.be.true;
178
          done();
179
        })
180
        .catch((error) => {
181
          console.error('Test failed');
182
          done(error);
183
        });
184
    });
185
186
    it('https://www.golinks.io/sitemap.xml sitemaps should return an empty array when timing out', function (done) {
187
      this.timeout(30000);
188
      const url = 'https://www.golinks.io/sitemap.xml';
189
      sitemapper.timeout = 1;
190
      sitemapper
191
        .fetch(url)
192
        .then((data) => {
193
          data.sites.should.be.Array;
194
          data.errors.should.be.Array;
195
          console.log(data);
196
          done();
197
        })
198
        .catch((error) => {
199
          console.error('Test failed');
200
          done(error);
201
        });
202
    });
203
204
    it('https://www.golinks.com/blog/sitemap.xml sitemaps should return an empty array when timing out', function (done) {
205
      this.timeout(30000);
206
      const url = 'https://www.golinks.com/blog/sitemap.xml';
207
      sitemapper.timeout = 10000;
208
      sitemapper
209
        .fetch(url)
210
        .then((data) => {
211
          data.sites.should.be.Array;
212
          data.errors.should.be.Array;
213
          done();
214
        })
215
        .catch((error) => {
216
          console.error('Test failed');
217
          done(error);
218
        });
219
    });
220
221
    it('https://www.banggood.com/sitemap/category.xml.gz gzip should be a non-empty array', function (done) {
222
      this.timeout(30000);
223
      const url = 'https://www.banggood.com/sitemap/category.xml.gz';
224
      sitemapper.timeout = 10000;
225
      sitemapper
226
        .fetch(url)
227
        .then((data) => {
228
          data.sites.should.be.Array;
229
          data.sites.length.should.be.greaterThan(0);
230
          done();
231
        })
232
        .catch((error) => {
233
          console.error('Test failed');
234
          done(error);
235
        });
236
    });
237
  });
238
239
  describe('gzipped sitemaps', function () {
240
    beforeEach(() => {
241
      sitemapper = new Sitemapper({
242
        requestHeaders: {
243
          'Accept-Encoding': 'gzip,deflate,sdch',
244
        },
245
      });
246
    });
247
248
    it('https://www.banggood.com/sitemap/category.xml.gz gzip should be a non-empty array', function (done) {
249
      this.timeout(30000);
250
      const url = 'https://www.banggood.com/sitemap/category.xml.gz';
251
      sitemapper.timeout = 10000;
252
      sitemapper
253
        .fetch(url)
254
        .then((data) => {
255
          data.sites.should.be.Array;
256
          data.errors.should.be.Array;
257
          data.sites.length.should.be.greaterThan(0);
258
          done();
259
        })
260
        .catch((error) => {
261
          console.error('Test failed');
262
          done(error);
263
        });
264
    });
265
266
    it('https://example.com/sitemap.xml should not allow insecure request', function (done) {
267
      this.timeout(30000);
268
      const url = 'https://example.com/sitemap.xml';
269
      sitemapper.timeout = 10000;
270
      sitemapper.rejectUnauthorized = false;
271
      sitemapper
272
        .fetch(url)
273
        .then((data) => {
274
          data.sites.should.be.Array;
275
          data.errors.should.be.Array;
276
          data.errors.should.containEql({
277
            type: 'HTTPError',
278
            message: 'HTTP Error occurred: Response code 404 (Not Found)',
279
            url: 'https://example.com/sitemap.xml',
280
            retries: 0,
281
          });
282
          done();
283
        })
284
        .catch((error) => {
285
          console.error('Test failed');
286
          done(error);
287
        });
288
    });
289
  });
290
291
  describe('sitemaps with namespace prefix', function () {
292
    it('https://www.emerald.com/sitemap.xml sitemaps should be an array', function (done) {
293
      this.timeout(30000);
294
      const url = 'https://www.emerald.com/sitemap.xml';
295
      sitemapper
296
        .fetch(url)
297
        .then((data) => {
298
          data.sites.should.be.Array;
299
          data.url.should.equal(url);
300
          data.sites.length.should.be.above(2);
301
          isUrl(data.sites[0]).should.be.true;
302
          done();
303
        })
304
        .catch((error) => {
305
          console.error('Test failed');
306
          done(error);
307
        });
308
    });
309
  });
310
311
  describe('getSites method', function () {
312
    it('getSites should be backwards compatible', function (done) {
313
      this.timeout(30000);
314
      const url = 'https://wp.seantburke.com/sitemap.xml';
315
      sitemapper.getSites(url, (err, sites) => {
316
        sites.should.be.Array;
317
        isUrl(sites[0]).should.be.true;
318
        done();
319
      });
320
    });
321
  });
322
323
  describe('exclusions option', function () {
324
    it('should prevent false positive', function (done) {
325
      this.timeout(30000);
326
      const url = 'https://wp.seantburke.com/sitemap.xml';
327
      sitemapper.exclusions = [/video/, /image/];
328
      sitemapper
329
        .fetch(url)
330
        .then((data) => {
331
          data.sites.should.be.Array;
332
          data.sites.includes('https://wp.seantburke.com/?page_id=2').should.be
333
            .true;
334
          done();
335
        })
336
        .catch((error) => {
337
          console.error('Test failed');
338
          done(error);
339
        });
340
    });
341
342
    it('should filter out page_id urls', function (done) {
343
      this.timeout(30000);
344
      const url = 'https://wp.seantburke.com/sitemap.xml';
345
      sitemapper.exclusions = [/page_id/];
346
      sitemapper
347
        .fetch(url)
348
        .then((data) => {
349
          data.sites.should.be.Array;
350
          data.sites.includes('https://wp.seantburke.com/?page_id=2').should.be
351
            .false;
352
          done();
353
        })
354
        .catch((error) => {
355
          console.error('Test failed');
356
          done(error);
357
        });
358
    });
359
  });
360
361
  describe('isExcluded method', function () {
362
    it('should return false when no exclusions are set', function () {
363
      const result = sitemapper.isExcluded('https://example.com/page1');
364
      result.should.be.false();
365
    });
366
367
    it('should return false when url does not match any exclusion patterns', function () {
368
      sitemapper.exclusions = [/\.pdf$/, /private/];
369
      const result = sitemapper.isExcluded('https://example.com/page1');
370
      result.should.be.false();
371
    });
372
373
    it('should return false when url matches an exclusion pattern', function () {
374
      sitemapper.exclusions = [/\.pdf$/, /private/];
375
      const result = sitemapper.isExcluded('https://example.com/document.pdf');
376
      result.should.be.true();
377
    });
378
379
    it('should return true when url matches any of multiple exclusion patterns', function () {
380
      sitemapper.exclusions = [/\.pdf$/, /private/, /temp/];
381
      const result = sitemapper.isExcluded(
382
        'https://example.com/private/temp.html'
383
      );
384
      result.should.be.true();
385
    });
386
387
    it('should handle complex regex patterns correctly', function () {
388
      sitemapper.exclusions = [/^https:\/\/example\.com\/([a-z]{2})\/private/];
389
      const result1 = sitemapper.isExcluded(
390
        'https://example.com/en/private/page'
391
      );
392
      const result2 = sitemapper.isExcluded(
393
        'https://example.com/en/public/page'
394
      );
395
      result1.should.be.true();
396
      result2.should.be.false();
397
    });
398
399
    it('should handle case sensitivity correctly', function () {
400
      sitemapper.exclusions = [/private/i];
401
      const result1 = sitemapper.isExcluded('https://example.com/PRIVATE/page');
402
      const result2 = sitemapper.isExcluded('https://example.com/Private/page');
403
      result1.should.be.true();
404
      result2.should.be.true();
405
    });
406
  });
407
});
408